<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html lang="en" dir="ltr" xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>Natural language processing - Wikipedia, the free encyclopedia</title>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<meta http-equiv="Content-Style-Type" content="text/css" />
<meta name="generator" content="MediaWiki 1.17wmf1" />
<meta name="robots" content="noindex,follow" />
<link rel="alternate" type="application/x-wiki" title="Edit this page" href="/w/index.php?title=Natural_language_processing&amp;action=edit" />
<link rel="edit" title="Edit this page" href="/w/index.php?title=Natural_language_processing&amp;action=edit" />
<link rel="apple-touch-icon" href="http://en.wikipedia.org/apple-touch-icon.png" />
<link rel="shortcut icon" href="/favicon.ico" />
<link rel="search" type="application/opensearchdescription+xml" href="/w/opensearch_desc.php" title="Wikipedia (en)" />
<link rel="EditURI" type="application/rsd+xml" href="http://en.wikipedia.org/w/api.php?action=rsd" />
<link rel="copyright" href="http://creativecommons.org/licenses/by-sa/3.0/" />
<link rel="alternate" type="application/atom+xml" title="Wikipedia Atom feed" href="/w/index.php?title=Special:RecentChanges&amp;feed=atom" />
<link rel="stylesheet" href="http://bits.wikimedia.org/en.wikipedia.org/load.php?debug=false&amp;lang=en&amp;modules=ext%21wikihiero%7Cmediawiki%21legacy%21commonPrint%7Cmediawiki%21legacy%21shared%7Cskins%21vector&amp;only=styles&amp;printable=1&amp;skin=vector" type="text/css" media="all" />
<meta name="ResourceLoaderDynamicStyles" content="" /><link rel="stylesheet" href="http://bits.wikimedia.org/en.wikipedia.org/load.php?debug=false&amp;lang=en&amp;modules=site&amp;only=styles&amp;printable=1&amp;skin=vector" type="text/css" media="all" />
<style type="text/css" media="all">a.new,#quickbar a.new{color:#ba0000}

/* cache key: enwiki:resourceloader:filter:minify-css:5:f2a9127573a22335c2a9102b208c73e7 */</style>
<script src="http://bits.wikimedia.org/en.wikipedia.org/load.php?debug=false&amp;lang=en&amp;modules=startup&amp;only=scripts&amp;printable=1&amp;skin=vector" type="text/javascript"></script>
<script type="text/javascript">if ( window.mediaWiki ) {
	mediaWiki.config.set({"wgCanonicalNamespace": "", "wgCanonicalSpecialPageName": false, "wgNamespaceNumber": 0, "wgPageName": "Natural_language_processing", "wgTitle": "Natural language processing", "wgAction": "view", "wgArticleId": 21652, "wgIsArticle": true, "wgUserName": null, "wgUserGroups": ["*"], "wgCurRevisionId": 450080870, "wgCategories": ["Articles needing additional references from July 2008", "All articles needing additional references", "Wikipedia articles needing reorganization from July 2008", "Computational linguistics", "Speech recognition", "Natural language processing"], "wgBreakFrames": false, "wgRestrictionEdit": [], "wgRestrictionMove": [], "wgSearchNamespaces": [0], "wgFlaggedRevsParams": {"tags": {"status": {"levels": 1, "quality": 2, "pristine": 3}}}, "wgStableRevisionId": null, "wgRevContents": {"error": "Unable to get content.", "waiting": "Waiting for content"}, "wgVectorEnabledModules": {"collapsiblenav": true, "collapsibletabs": true, "editwarning": true, "expandablesearch": false, "footercleanup": false, "sectioneditlinks": false, "simplesearch": true, "experiments": true}, "wgWikiEditorEnabledModules": {"toolbar": true, "dialogs": true, "templateEditor": false, "templates": false, "addMediaWizard": false, "preview": false, "previewDialog": false, "publish": false, "toc": false}, "wgTrackingToken": "4ef74d989bbdb781899a79cfb6184f0d", "wikilove-recipient": "", "wikilove-edittoken": "+\\", "wikilove-anon": 0, "mbEditToken": "+\\", "Geo": {"city": "", "country": ""}, "wgNoticeProject": "wikipedia"});
}
</script>

<!--[if lt IE 7]><style type="text/css">body{behavior:url("/w/skins-1.17/vector/csshover.min.htc")}</style><![endif]--></head>
<body class="mediawiki ltr ns-0 ns-subject page-Natural_language_processing skin-vector">
		<div id="mw-page-base" class="noprint"></div>
		<div id="mw-head-base" class="noprint"></div>
		<!-- content -->
		<div id="content">
			<a id="top"></a>
			<div id="mw-js-message" style="display:none;"></div>
						<!-- sitenotice -->
			<div id="siteNotice"><!-- centralNotice loads here --></div>
			<!-- /sitenotice -->
						<!-- firstHeading -->
			<h1 id="firstHeading" class="firstHeading">Natural language processing</h1>
			<!-- /firstHeading -->
			<!-- bodyContent -->
			<div id="bodyContent">
				<!-- tagline -->
				<div id="siteSub">From Wikipedia, the free encyclopedia</div>
				<!-- /tagline -->
				<!-- subtitle -->
				<div id="contentSub"></div>
				<!-- /subtitle -->
																<!-- jumpto -->
				<div id="jump-to-nav">
					Jump to: <a href="#mw-head">navigation</a>,
					<a href="#p-search">search</a>
				</div>
				<!-- /jumpto -->
								<!-- bodytext -->
				<table class="metadata plainlinks ambox ambox-content" style="">
<tr>
<td class="mbox-image">
<div style="width: 52px;"><img alt="" src="http://upload.wikimedia.org/wikipedia/en/f/f4/Ambox_content.png" width="40" height="40" /></div>
</td>
<td class="mbox-text" style="">
<div>
<div style="text-align:left;"><b>This article has multiple issues</b>. Please help <b><a href="http://en.wikipedia.org/w/index.php?title=Natural_language_processing&amp;action=edit" class="external text" rel="nofollow">improve it</a></b> or discuss these issues on the <b><a href="/wiki/Talk:Natural_language_processing" title="Talk:Natural language processing">talk page</a></b>.</div>
<ul>
<li>It <b>needs additional <a href="/wiki/Wikipedia:Citing_sources#Inline_citations" title="Wikipedia:Citing sources">citations</a> for <a href="/wiki/Wikipedia:Verifiability" title="Wikipedia:Verifiability">verification</a></b>. <small>Tagged since July 2008.</small></li>
<li>It <b>may be in need of reorganization to comply with Wikipedia's <a href="/wiki/Wikipedia:Layout" title="Wikipedia:Layout" class="mw-redirect">layout guidelines</a></b>. <small>Tagged since July 2008.</small></li>
</ul>
</div>
</td>
</tr>
</table>
<p><b>Natural language processing</b> (<b>NLP</b>) is a field of <a href="/wiki/Computer_science" title="Computer science">computer science</a> and <a href="/wiki/Linguistics" title="Linguistics">linguistics</a> concerned with the interactions between computers and human (natural) languages; it began as a branch of artificial intelligence.<sup id="cite_ref-0" class="reference"><a href="#cite_note-0"><span>[</span>1<span>]</span></a></sup> In theory, natural language processing is a very attractive method of <a href="/wiki/Human%E2%80%93computer_interaction" title="Human–computer interaction">human–computer interaction</a>. <a href="/wiki/Natural_language_understanding" title="Natural language understanding">Natural language understanding</a> is sometimes referred to as an <a href="/wiki/AI-complete" title="AI-complete">AI-complete</a> problem because it seems to require extensive knowledge about the outside world and the ability to manipulate it.</p>
<p>Whether NLP is distinct from, or identical to, the field of <a href="/wiki/Computational_linguistics" title="Computational linguistics">computational linguistics</a> is a matter of perspective. The <a href="/wiki/Association_for_Computational_Linguistics" title="Association for Computational Linguistics">Association for Computational Linguistics</a> defines the latter as focusing on the theoretical aspects of NLP. On the other hand, the open-access journal "Computational Linguistics", styles itself as "the longest running publication devoted exclusively to the design and analysis of natural language processing systems" <a href="http://www.mitpressjournals.org/loi/coli" class="external text" rel="nofollow">(Computational Linguistics (Journal))</a></p>
<p>Modern NLP algorithms are grounded in <a href="/wiki/Machine_learning" title="Machine learning">machine learning</a>, especially <a href="/wiki/Statistical_inference" title="Statistical inference">statistical</a> machine learning. Research into modern statistical NLP algorithms requires an understanding of a number of disparate fields, including <a href="/wiki/Linguistics" title="Linguistics">linguistics</a>, <a href="/wiki/Computer_science" title="Computer science">computer science</a>, and <a href="/wiki/Statistics" title="Statistics">statistics</a>. For a discussion of the types of algorithms currently used in NLP, see the article on <a href="/wiki/Pattern_recognition" title="Pattern recognition">pattern recognition</a>.</p>
<div class="thumb tright">
<div class="thumbinner" style="width:172px;"><a href="/wiki/File:Automated_online_assistant.png" class="image"><img alt="" src="http://upload.wikimedia.org/wikipedia/commons/thumb/8/8b/Automated_online_assistant.png/170px-Automated_online_assistant.png" width="170" height="213" class="thumbimage" /></a>
<div class="thumbcaption">
<div class="magnify"><a href="/wiki/File:Automated_online_assistant.png" class="internal" title="Enlarge"><img src="http://bits.wikimedia.org/skins-1.17/common/images/magnify-clip.png" width="15" height="11" alt="" /></a></div>
An <a href="/wiki/Automated_online_assistant" title="Automated online assistant">automated online assistant</a> providing <a href="/wiki/Customer_service" title="Customer service">customer service</a> on a web page, an example of an application where natural language processing is a major component.<sup id="cite_ref-Kongthon_1-0" class="reference"><a href="#cite_note-Kongthon-1"><span>[</span>2<span>]</span></a></sup></div>
</div>
</div>
<table id="toc" class="toc">
<tr>
<td>
<div id="toctitle">
<h2>Contents</h2>
</div>
<ul>
<li class="toclevel-1 tocsection-1"><a href="#History"><span class="tocnumber">1</span> <span class="toctext">History</span></a></li>
<li class="toclevel-1 tocsection-2"><a href="#NLP_using_machine_learning"><span class="tocnumber">2</span> <span class="toctext">NLP using machine learning</span></a></li>
<li class="toclevel-1 tocsection-3"><a href="#Major_tasks_in_NLP"><span class="tocnumber">3</span> <span class="toctext">Major tasks in NLP</span></a></li>
<li class="toclevel-1 tocsection-4"><a href="#Statistical_NLP"><span class="tocnumber">4</span> <span class="toctext">Statistical NLP</span></a></li>
<li class="toclevel-1 tocsection-5"><a href="#Evaluation_of_natural_language_processing"><span class="tocnumber">5</span> <span class="toctext">Evaluation of natural language processing</span></a>
<ul>
<li class="toclevel-2 tocsection-6"><a href="#Objectives"><span class="tocnumber">5.1</span> <span class="toctext">Objectives</span></a></li>
<li class="toclevel-2 tocsection-7"><a href="#Short_history_of_evaluation_in_NLP"><span class="tocnumber">5.2</span> <span class="toctext">Short history of evaluation in NLP</span></a></li>
<li class="toclevel-2 tocsection-8"><a href="#Different_types_of_evaluation"><span class="tocnumber">5.3</span> <span class="toctext">Different types of evaluation</span></a></li>
<li class="toclevel-2 tocsection-9"><a href="#Shared_tasks_.28Campaigns.29"><span class="tocnumber">5.4</span> <span class="toctext">Shared tasks (Campaigns)</span></a></li>
</ul>
</li>
<li class="toclevel-1 tocsection-10"><a href="#Standardization_in_NLP"><span class="tocnumber">6</span> <span class="toctext">Standardization in NLP</span></a></li>
<li class="toclevel-1 tocsection-11"><a href="#Journals"><span class="tocnumber">7</span> <span class="toctext">Journals</span></a></li>
<li class="toclevel-1 tocsection-12"><a href="#Organizations_and_conferences"><span class="tocnumber">8</span> <span class="toctext">Organizations and conferences</span></a>
<ul>
<li class="toclevel-2 tocsection-13"><a href="#Associations"><span class="tocnumber">8.1</span> <span class="toctext">Associations</span></a></li>
<li class="toclevel-2 tocsection-14"><a href="#Conferences"><span class="tocnumber">8.2</span> <span class="toctext">Conferences</span></a></li>
</ul>
</li>
<li class="toclevel-1 tocsection-15"><a href="#Software_tools"><span class="tocnumber">9</span> <span class="toctext">Software tools</span></a></li>
<li class="toclevel-1 tocsection-16"><a href="#See_also"><span class="tocnumber">10</span> <span class="toctext">See also</span></a></li>
<li class="toclevel-1 tocsection-17"><a href="#References"><span class="tocnumber">11</span> <span class="toctext">References</span></a>
<ul>
<li class="toclevel-2 tocsection-18"><a href="#Related_academic_articles"><span class="tocnumber">11.1</span> <span class="toctext">Related academic articles</span></a></li>
</ul>
</li>
<li class="toclevel-1 tocsection-19"><a href="#External_links"><span class="tocnumber">12</span> <span class="toctext">External links</span></a>
<ul>
<li class="toclevel-2 tocsection-20"><a href="#Resources"><span class="tocnumber">12.1</span> <span class="toctext">Resources</span></a></li>
<li class="toclevel-2 tocsection-21"><a href="#Organizations"><span class="tocnumber">12.2</span> <span class="toctext">Organizations</span></a></li>
</ul>
</li>
</ul>
</td>
</tr>
</table>
<h2><span class="mw-headline" id="History">History</span></h2>
<div class="rellink relarticle mainarticle">Main article: <a href="/wiki/History_of_natural_language_processing" title="History of natural language processing">History of natural language processing</a></div>
<p>The history of NLP generally starts in the 1950s, although work can be found from earlier periods. In 1950, <a href="/wiki/Alan_Turing" title="Alan Turing">Alan Turing</a> published his famous article "<a href="/wiki/Computing_Machinery_and_Intelligence" title="Computing Machinery and Intelligence">Computing Machinery and Intelligence</a>" which proposed what is now called the <a href="/wiki/Turing_test" title="Turing test">Turing test</a> as a criterion of intelligence. This criterion depends on the ability of a computer program to impersonate a human in a real-time written conversation with a human judge, sufficiently well that the judge is unable to distinguish reliably&#160;— on the basis of the conversational content alone&#160;— between the program and a real human. The <a href="/wiki/Georgetown-IBM_experiment" title="Georgetown-IBM experiment">Georgetown experiment</a> in 1954 involved fully automatic translation of more than sixty Russian sentences into English. The authors claimed that within three or five years, machine translation would be a solved problem.<sup id="cite_ref-2" class="reference"><a href="#cite_note-2"><span>[</span>3<span>]</span></a></sup> However, real progress was much slower, and after the <a href="/wiki/ALPAC" title="ALPAC">ALPAC report</a> in 1966, which found that ten years long research had failed to fulfill the expectations, funding for machine translation was dramatically reduced. Little further research in machine translation was conducted until the late 1980s, when the first <a href="/wiki/Statistical_machine_translation" title="Statistical machine translation">statistical machine translation</a> systems were developed.</p>
<p>Some notably successful NLP systems developed in the 1960s were <a href="/wiki/SHRDLU" title="SHRDLU">SHRDLU</a>, a natural language system working in restricted "<a href="/wiki/Blocks_world" title="Blocks world">blocks worlds</a>" with restricted vocabularies, and <a href="/wiki/ELIZA" title="ELIZA">ELIZA</a>, a simulation of a <a href="/wiki/Rogerian_psychotherapy" title="Rogerian psychotherapy" class="mw-redirect">Rogerian psychotherapist</a>, written by <a href="/wiki/Joseph_Weizenbaum" title="Joseph Weizenbaum">Joseph Weizenbaum</a> between 1964 to 1966. Using almost no information about human thought or emotion, ELIZA sometimes provided a startlingly human-like interaction. When the "patient" exceeded the very small knowledge base, ELIZA might provide a generic response, for example, responding to "My head hurts" with "Why do you say your head hurts?".</p>
<p>During the 70's many programmers began to write 'conceptual ontologies', which structured real-world information into computer-understandable data. Examples are MARGIE (Schank, 1975), SAM (Cullingford, 1978), PAM (Wilensky, 1978), TaleSpin (Meehan, 1976), QUALM (Lehnert, 1977), Politics (Carbonell, 1979), and Plot Units (Lehnert 1981). During this time, many <a href="/wiki/Chatterbots" title="Chatterbots" class="mw-redirect">chatterbots</a> were written including <a href="/wiki/PARRY" title="PARRY">PARRY</a>, <a href="/wiki/Racter" title="Racter">Racter</a>, and <a href="/wiki/Jabberwacky" title="Jabberwacky">Jabberwacky</a>.</p>
<p>Up to the 1980s, most NLP systems were based on complex sets of hand-written rules. Starting in the late 1980s, however, there was a revolution in NLP with the introduction of <a href="/wiki/Machine_learning" title="Machine learning">machine learning</a> algorithms for language processing. This was due both to the steady increase in computational power resulting from <a href="/wiki/Moore%27s_Law" title="Moore's Law" class="mw-redirect">Moore's Law</a> and the gradual lessening of the dominance of <a href="/wiki/Noam_Chomsky" title="Noam Chomsky">Chomskyan</a> theories of linguistics (e.g. <a href="/wiki/Transformational_grammar" title="Transformational grammar">transformational grammar</a>), whose theoretical underpinnings discouraged the sort of <a href="/wiki/Corpus_linguistics" title="Corpus linguistics">corpus linguistics</a> that underlies the machine-learning approach to language processing.<sup id="cite_ref-3" class="reference"><a href="#cite_note-3"><span>[</span>4<span>]</span></a></sup> Some of the earliest-used machine learning algorithms, such as <a href="/wiki/Decision_tree" title="Decision tree">decision trees</a>, produced systems of hard if-then rules similar to existing hand-written rules. Increasingly, however, research has focused on <a href="/wiki/Statistical_natural_language_processing" title="Statistical natural language processing" class="mw-redirect">statistical models</a>, which make soft, <a href="/wiki/Probabilistic" title="Probabilistic" class="mw-redirect">probabilistic</a> decisions based on attaching <a href="/wiki/Real-valued" title="Real-valued" class="mw-redirect">real-valued</a> weights to the features making up the input data. Such models are generally more robust when given unfamiliar input, especially input that contains errors (as is very common for real-world data), and produce more reliable results when integrated into a larger system comprising multiple subtasks.</p>
<p>Many of the notable early successes occurred in the field of <a href="/wiki/Machine_translation" title="Machine translation">machine translation</a>, due especially to work at IBM Research, where successively more complicated statistical models were developed. These systems were able to take advantage of existing multilingual <a href="/wiki/Text_corpus" title="Text corpus">textual corpora</a> that had been produced by the <a href="/wiki/Parliament_of_Canada" title="Parliament of Canada">Parliament of Canada</a> and the <a href="/wiki/European_Union" title="European Union">European Union</a> as a result of laws calling for the translation of all governmental proceedings into all official languages of the corresponding systems of government. However, most other systems depended on corpora specifically developed for the tasks implemented by these systems, which was (and often continues to be) a major limitation in the success of these systems. As a result, a great deal of research has gone into methods of more effectively learning from limited amounts of data.</p>
<p>Recent research has increasingly focused on <a href="/wiki/Unsupervised_learning" title="Unsupervised learning">unsupervised</a> and <a href="/wiki/Semi-supervised_learning" title="Semi-supervised learning">semi-supervised</a> learning algorithms. Such algorithms are able to learn from data that has not been hand-annotated with the desired answers, or using a combination of annotated and non-annotated data. Generally, this task is much more difficult than <a href="/wiki/Supervised_learning" title="Supervised learning">supervised learning</a>, and typically produces less accurate results for a given amount of input data. However, there is an enormous amount of non-annotated data available (including, among other things, the entire content of the <a href="/wiki/World_Wide_Web" title="World Wide Web">World Wide Web</a>), which can often make up for the inferior results.</p>
<h2><span class="mw-headline" id="NLP_using_machine_learning">NLP using machine learning</span></h2>
<p>As described above, modern approaches to natural language processing (NLP) are grounded in <a href="/wiki/Machine_learning" title="Machine learning">machine learning</a>. The paradigm of machine learning is different from that of most prior attempts at language processing. Prior implementations of language-processing tasks typically involved the direct hand coding of large sets of rules. The machine-learning paradigm calls instead for using general learning algorithms&#160;— often, although not always, grounded in <a href="/wiki/Statistical_inference" title="Statistical inference">statistical inference</a>&#160;— to automatically learn such rules through the analysis of large <i><a href="/wiki/Text_corpus" title="Text corpus">corpora</a></i> of typical real-world examples. A <i>corpus</i> (plural, "corpora") is a set of documents (or sometimes, individual sentences) that have been hand-annotated with the correct values to be learned.</p>
<p>As an example, consider the task of <a href="/wiki/Part_of_speech_tagging" title="Part of speech tagging" class="mw-redirect">part of speech tagging</a>, i.e. determining the correct part of speech of each word in a given sentence, typically one that has never been seen before. A typical machine-learning-based implementation of a <a href="/wiki/Part_of_speech_tagger" title="Part of speech tagger" class="mw-redirect">part of speech tagger</a> proceeds in two steps, a <i>training</i> step and an <i>evaluation</i> step. The first step&#160;— the <i>training</i> step&#160;— makes use of a corpus of <i>training data</i>, which consists of a large number of sentences, each of which has the correct part of speech attached to each word. (An example of such a corpus in common use is the <i>Penn Treebank</i>. This includes (among other things) a set of 500 texts from the <a href="/wiki/Brown_Corpus" title="Brown Corpus">Brown Corpus</a>, containing examples of various genres of text, and 2500 articles from the <a href="/wiki/Wall_Street_Journal" title="Wall Street Journal" class="mw-redirect">Wall Street Journal</a>.) This corpus is analyzed and a learning model is generated from it, consisting of automatically created rules for determining the part of speech for a word in a sentence, typically based on the nature of the word in question, the nature of surrounding words, and the most likely part of speech for those surrounding words. The model that is generated is typically the best model that can be found that simultaneously meets two conflicting objectives: To perform as well as possible on the training data, and to be as simple as possible (so that the model avoids <a href="/wiki/Overfitting" title="Overfitting">overfitting</a> the training data, i.e. so that it generalizes as well as possible to new data rather than only succeeding on sentences that have already been seen). In the second step (the evaluation step), the model that has been learned is used to process new sentences. An important part of the development of any learning algorithm is testing the model that has been learned on new, previously unseen data. It is critical that the data used for testing is not the same as the data used for training; otherwise, the testing accuracy will be unrealistically high.</p>
<p>Many different classes of machine learning algorithms have been applied to NLP tasks. In common to all of these algorithms is that they take as input a large set of "features" that are generated from the input data. As an example, for a <a href="/wiki/Part-of-speech_tagger" title="Part-of-speech tagger" class="mw-redirect">part-of-speech tagger</a>, typical features might be the identity of the word being processed, the identity of the words immediately to the left and right, the part-of-speech tag of the word to the left, and whether the word being considered or its immediate neighbors are <a href="/wiki/Content_word" title="Content word" class="mw-redirect">content words</a> or <a href="/wiki/Function_word" title="Function word">function words</a>. The algorithms differ, however, in the nature of the rules generated. Some of the earliest-used algorithms, such as <a href="/wiki/Decision_tree" title="Decision tree">decision trees</a>, produced systems of hard if-then rules similar to the systems of hand-written rules that were then common. Increasingly, however, research has focused on <a href="/wiki/Statistical_natural_language_processing" title="Statistical natural language processing" class="mw-redirect">statistical models</a>, which make soft, <a href="/wiki/Probabilistic" title="Probabilistic" class="mw-redirect">probabilistic</a> decisions based on attaching <a href="/wiki/Real-valued" title="Real-valued" class="mw-redirect">real-valued</a> weights to each input feature. Such models have the advantage that they can express the relative certainty of many different possible answers rather than only one, producing more reliable results when such a model is included as a component of a larger system. In addition, models that make soft decisions are generally more robust when given unfamiliar input, especially input that contains errors (as is very common for real-world data).</p>
<p>Systems based on machine-learning algorithms have many advantages over hand-produced rules:</p>
<ul>
<li>The learning procedures used during machine learning automatically focus on the most common cases, whereas when writing rules by hand it is often not obvious at all where the effort should be directed.</li>
<li>Automatic learning procedures can make use of <a href="/wiki/Statistical_inference" title="Statistical inference">statistical inference</a> algorithms to produce models that are robust to unfamiliar input (e.g. containing words or structures that have not been seen before) and to erroneous input (e.g. with misspelled words or words accidentally omitted). Generally, handling such input gracefully with hand-written rules&#160;— or more generally, creating systems of hand-written rules that make soft decisions&#160;— is extremely difficult and error-prone.</li>
<li>Systems based on automatically learning the rules can be made more accurate simply by supplying more input data. However, systems based on hand-written rules can only be made more accurate by increasing the complexity of the rules, which is a much more difficult task. In particular, there is a limit to the complexity of systems based on hand-crafted rules, beyond which the systems become more and more unmanageable. However, creating more data to input to machine-learning systems simply requires a corresponding increase in the number of man-hours worked, generally without significant increases in the complexity of the annotation process.</li>
</ul>
<h2><span class="mw-headline" id="Major_tasks_in_NLP">Major tasks in NLP</span></h2>
<p>The following is a list of some of the most commonly researched tasks in NLP. Note that some of these tasks have direct real-world applications, while others more commonly serve as subtasks that are used to aid in solving larger tasks. What distinguishes these tasks from other potential and actual NLP tasks is not only the volume of research devoted to them but the fact that for each one there is typically a well-defined problem setting, a standard metric for evaluating the task, standard <a href="/wiki/Corpora" title="Corpora" class="mw-redirect">corpora</a> on which the task can be evaluated, and competitions devoted to the specific task.</p>
<ul>
<li><a href="/wiki/Automatic_summarization" title="Automatic summarization">Automatic summarization</a>: Produce a readable summary of a chunk of text. Often used to provide summaries of text of a known type, such as articles in the financial section of a newspaper.</li>
<li><a href="/w/index.php?title=Coreference_resolution&amp;action=edit&amp;redlink=1" class="new" title="Coreference resolution (page does not exist)">Coreference resolution</a>: Given a sentence or larger chunk of text, determine which words ("mentions") refer to the same objects ("entities"). <a href="/wiki/Anaphora_resolution" title="Anaphora resolution" class="mw-redirect">Anaphora resolution</a> is a specific example of this task, and is specifically concerned with matching up <a href="/wiki/Pronoun" title="Pronoun">pronouns</a> with the nouns or names that they refer to. The more general task of coreference resolution also includes identify so-called "bridging relationships" involving <a href="/wiki/Referring_expression" title="Referring expression">referring expressions</a>. For example, in a sentence such as "He entered John's house through the front door", "the front door" is a referring expression and the bridging relationship to be identified is the fact that the door being referred to is the front door of John's house (rather than of some other structure that might also be referred to).</li>
<li><a href="/wiki/Discourse_analysis" title="Discourse analysis">Discourse analysis</a>: This rubric includes a number of related tasks. One task is identifying the <a href="/w/index.php?title=Discourse_structure&amp;action=edit&amp;redlink=1" class="new" title="Discourse structure (page does not exist)">discourse structure</a> of connected text, i.e. the nature of the discourse relationships between sentences (e.g. elaboration, explanation, contrast). Another possible task is recognizing and classifying the <a href="/wiki/Speech_act" title="Speech act">speech acts</a> in a chunk of text (e.g. yes-no question, content question, statement, assertion, etc.).</li>
<li><a href="/wiki/Machine_translation" title="Machine translation">Machine translation</a>: Automatically translate text from one human language to another. This is one of the most difficult problems, and is a member of a class of problems colloquially termed "<a href="/wiki/AI-complete" title="AI-complete">AI-complete</a>", i.e. requiring all of the different types of knowledge that humans possess (grammar, semantics, facts about the real world, etc.) in order to solve properly.</li>
<li><a href="/w/index.php?title=Morphological_segmentation&amp;action=edit&amp;redlink=1" class="new" title="Morphological segmentation (page does not exist)">Morphological segmentation</a>: Separate words into individual <a href="/wiki/Morphemes" title="Morphemes" class="mw-redirect">morphemes</a> and identify the class of the morphemes. The difficulty of this task depends greatly on the complexity of the <a href="/wiki/Morphology_(linguistics)" title="Morphology (linguistics)">morphology</a> (i.e. the structure of words) of the language being considered. <a href="/wiki/English_language" title="English language">English</a> has fairly simple morphology, especially <a href="/wiki/Inflectional_morphology" title="Inflectional morphology" class="mw-redirect">inflectional morphology</a>, and thus it is often possible to ignore this task entirely and simply model all possible forms of a word (e.g. "open, opens, opened, opening") as separate words. In languages such as <a href="/wiki/Turkish_language" title="Turkish language">Turkish</a>, however, such an approach is not possible, as each dictionary entry has thousands of possible word forms.</li>
<li><a href="/wiki/Named_entity_recognition" title="Named entity recognition">Named entity recognition</a> (NER): Given a stream of text, determine which items in the text map to proper names, such as people or places, and what the type of each such name is (e.g. person, location, organization). Note that, although <a href="/wiki/Capitalization" title="Capitalization">capitalization</a> can aid in recognizing named entities in languages such as English, this information cannot aid in determining the type of named entity, and in any case is often inaccurate or insufficient. For example, the first word of a sentence is also capitalized, and named entities often span several words, only some of which are capitalized. Furthermore, many other languages in non-Western scripts (e.g. <a href="/wiki/Chinese_language" title="Chinese language">Chinese</a> or <a href="/wiki/Arabic_language" title="Arabic language">Arabic</a>) do not have any capitalization at all, and even languages with capitalization may not consistently use it to distinguish names. For example, <a href="/wiki/German_language" title="German language">German</a> capitalizes all <a href="/wiki/Noun" title="Noun">nouns</a>, regardless of whether they refer to names, and <a href="/wiki/French_language" title="French language">French</a> and <a href="/wiki/Spanish_language" title="Spanish language">Spanish</a> do not capitalize names that serve as <a href="/wiki/Adjective" title="Adjective">adjectives</a>.</li>
<li><a href="/wiki/Natural_language_generation" title="Natural language generation">Natural language generation</a>: Convert information from computer databases into readable human language.</li>
<li><a href="/wiki/Natural_language_understanding" title="Natural language understanding">Natural language understanding</a>: Convert chunks of text into more formal representations such as <a href="/wiki/First-order_logic" title="First-order logic">first-order logic</a> structures that are easier for <a href="/wiki/Computer" title="Computer">computer</a> programs to manipulate. Natural language understanding involves the identification of the intended semantic from the multiple possible semantics which can be derived from a natural language expression which usually takes the form of organized notations of natural languages concepts. Introduction and creation of language metamodel and ontology are efficient however empirical solutions. An explicit formalization of natural languages semantics without confusions with implicit assumptions such as closed world assumption (CWA) vs. open world assumption, or subjective Yes/No vs. objective True/False is expected for the construction of a basis of semantics formalization.<sup id="cite_ref-4" class="reference"><a href="#cite_note-4"><span>[</span>5<span>]</span></a></sup></li>
</ul>
<ul>
<li><a href="/wiki/Optical_character_recognition" title="Optical character recognition">Optical character recognition</a> (OCR): Given an image representing printed text, determine the corresponding text.</li>
<li><a href="/wiki/Part-of-speech_tagging" title="Part-of-speech tagging">Part-of-speech tagging</a>: Given a sentence, determine the <a href="/wiki/Part_of_speech" title="Part of speech">part of speech</a> for each word. Many words, especially common ones, can serve as multiple <a href="/wiki/Parts_of_speech" title="Parts of speech" class="mw-redirect">parts of speech</a>. For example, "book" can be a <a href="/wiki/Noun" title="Noun">noun</a> ("the book on the table") or <a href="/wiki/Verb" title="Verb">verb</a> ("to book a flight"); "set" can be a <a href="/wiki/Noun" title="Noun">noun</a>, <a href="/wiki/Verb" title="Verb">verb</a> or <a href="/wiki/Adjective" title="Adjective">adjective</a>; and "out" can be any of at least five different parts of speech. Note that some languages have more such ambiguity than others. Languages with little <a href="/wiki/Inflectional_morphology" title="Inflectional morphology" class="mw-redirect">inflectional morphology</a>, such as <a href="/wiki/English_language" title="English language">English</a> are particularly prone to such ambiguity. <a href="/wiki/Chinese_language" title="Chinese language">Chinese</a> is prone to such ambiguity because it is a <a href="/wiki/Tonal_language" title="Tonal language" class="mw-redirect">tonal language</a> during verbalization. Such inflection is not readily conveyed via the entities employed within the orthography to convey intended meaning.</li>
<li><a href="/wiki/Parsing" title="Parsing">Parsing</a>: Determine the <a href="/wiki/Parse_tree" title="Parse tree">parse tree</a> (grammatical analysis) of a given sentence. The <a href="/wiki/Grammar" title="Grammar">grammar</a> for <a href="/wiki/Natural_language" title="Natural language">natural languages</a> is <a href="/wiki/Ambiguous" title="Ambiguous" class="mw-redirect">ambiguous</a> and typical sentences have multiple possible analyses. In fact, perhaps surprisingly, for a typical sentence there may be thousands of potential parses (most of which will seem completely nonsensical to a human).</li>
<li><a href="/wiki/Question_answering" title="Question answering">Question answering</a>: Given a human-language question, determine its answer. Typical questions have a specific right answer (such as "What is the capital of Canada?"), but sometimes open-ended questions are also considered (such as "What is the meaning of life?").</li>
<li><a href="/wiki/Relationship_extraction" title="Relationship extraction">Relationship extraction</a>: Given a chunk of text, identify the relationships among named entities (e.g. who is the wife of whom).</li>
<li><a href="/wiki/Sentence_breaking" title="Sentence breaking" class="mw-redirect">Sentence breaking</a> (also known as <a href="/wiki/Sentence_boundary_disambiguation" title="Sentence boundary disambiguation">sentence boundary disambiguation</a>): Given a chunk of text, find the sentence boundaries. Sentence boundaries are often marked by <a href="/wiki/Full_stop" title="Full stop">periods</a> or other <a href="/wiki/Punctuation_mark" title="Punctuation mark" class="mw-redirect">punctuation marks</a>, but these same characters can serve other purposes (e.g. marking <a href="/wiki/Abbreviation" title="Abbreviation">abbreviations</a>).</li>
<li><a href="/wiki/Sentiment_analysis" title="Sentiment analysis">Sentiment analysis</a>: Extract subjective information usually from a set of documents, often using online reviews to determine "polarity" about specific objects. It is especially useful for identifying trends of public opinion in the social media, for the purpose of marketing.</li>
<li><a href="/wiki/Speech_recognition" title="Speech recognition">Speech recognition</a>: Given a sound clip of a person or people speaking, determine the textual representation of the speech. This is the opposite of <a href="/wiki/Text_to_speech" title="Text to speech" class="mw-redirect">text to speech</a> and is one of the extremely difficult problems colloquially termed "<a href="/wiki/AI-complete" title="AI-complete">AI-complete</a>" (see above). In <a href="/wiki/Natural_speech" title="Natural speech" class="mw-redirect">natural speech</a> there are hardly any pauses between successive words, and thus <a href="/wiki/Speech_segmentation" title="Speech segmentation">speech segmentation</a> is a necessary subtask of speech recognition (see below). Note also that in most spoken languages, the sounds representing successive letters blend into each other in a process termed <a href="/wiki/Coarticulation" title="Coarticulation">coarticulation</a>, so the conversion of the analog signal to discrete characters can be a very difficult process.</li>
<li><a href="/wiki/Speech_segmentation" title="Speech segmentation">Speech segmentation</a>: Given a sound clip of a person or people speaking, separate it into words. A subtask of <a href="/wiki/Speech_recognition" title="Speech recognition">speech recognition</a> and typically grouped with it.</li>
<li><a href="/wiki/Topic_segmentation" title="Topic segmentation" class="mw-redirect">Topic segmentation</a> and recognition: Given a chunk of text, separate it into segments each of which is devoted to a topic, and identify the topic of the segment.</li>
<li><a href="/wiki/Word_segmentation" title="Word segmentation" class="mw-redirect">Word segmentation</a>: Separate a chunk of continuous text into separate words. For a language like <a href="/wiki/English_language" title="English language">English</a>, this is fairly trivial, since words are usually separated by spaces. However, some written languages like <a href="/wiki/Chinese_language" title="Chinese language">Chinese</a>, <a href="/wiki/Japanese_language" title="Japanese language">Japanese</a> and <a href="/wiki/Thai_language" title="Thai language">Thai</a> do not mark word boundaries in such a fashion, and in those languages text segmentation is a significant task requiring knowledge of the <a href="/wiki/Vocabulary" title="Vocabulary">vocabulary</a> and <a href="/wiki/Morphology_(linguistics)" title="Morphology (linguistics)">morphology</a> of words in the language.</li>
<li><a href="/wiki/Word_sense_disambiguation" title="Word sense disambiguation" class="mw-redirect">Word sense disambiguation</a>: Many words have more than one <a href="/wiki/Meaning" title="Meaning">meaning</a>; we have to select the meaning which makes the most sense in context. For this problem, we are typically given a list of words and associated word senses, e.g. from a dictionary or from an online resource such as <a href="/wiki/WordNet" title="WordNet">WordNet</a>.</li>
</ul>
<p>In some cases, sets of related tasks are grouped into subfields of NLP that are often considered separately from NLP as a whole. Examples include:</p>
<ul>
<li><a href="/wiki/Information_retrieval" title="Information retrieval">Information retrieval</a> (IR): This is concerned with storing, searching and retrieving information. It is a separate field within computer science (closer to databases), but IR relies on some NLP methods (for example, stemming). Some current research and applications seek to bridge the gap between IR and NLP.</li>
<li><a href="/wiki/Information_extraction" title="Information extraction">Information extraction</a> (IE): This is concerned in general with the extraction of semantic information from text. This covers tasks such as <a href="/wiki/Named_entity_recognition" title="Named entity recognition">named entity recognition</a>, <a href="/w/index.php?title=Coreference_resolution&amp;action=edit&amp;redlink=1" class="new" title="Coreference resolution (page does not exist)">coreference resolution</a>, <a href="/wiki/Relationship_extraction" title="Relationship extraction">relationship extraction</a>, etc.</li>
<li><a href="/wiki/Speech_processing" title="Speech processing">Speech processing</a>: This covers <a href="/wiki/Speech_recognition" title="Speech recognition">speech recognition</a>, <a href="/wiki/Text-to-speech" title="Text-to-speech" class="mw-redirect">text-to-speech</a> and related tasks.</li>
</ul>
<p>Other tasks include:</p>
<ul>
<li><a href="/wiki/Stemming" title="Stemming">Stemming</a></li>
<li><a href="/wiki/Text_simplification" title="Text simplification">Text simplification</a></li>
<li><a href="/wiki/Text-to-speech" title="Text-to-speech" class="mw-redirect">Text-to-speech</a></li>
<li><a href="/wiki/Text-proofing" title="Text-proofing" class="mw-redirect">Text-proofing</a></li>
<li><a href="/wiki/Natural_language_user_interface" title="Natural language user interface">Natural language search</a></li>
<li><a href="/wiki/Query_expansion" title="Query expansion">Query expansion</a></li>
<li><a href="/wiki/Truecasing" title="Truecasing">Truecasing</a></li>
</ul>
<h2><span class="mw-headline" id="Statistical_NLP">Statistical NLP</span></h2>
<div class="rellink relarticle mainarticle">Main article: <a href="/wiki/Stochastic_grammar" title="Stochastic grammar">statistical natural language processing</a></div>
<p>Statistical natural-language processing uses <a href="/wiki/Stochastic" title="Stochastic">stochastic</a>, <a href="/wiki/Probabilistic" title="Probabilistic" class="mw-redirect">probabilistic</a> and <a href="/wiki/Statistical" title="Statistical" class="mw-redirect">statistical</a> methods to resolve some of the difficulties discussed above, especially those which arise because longer sentences are highly ambiguous when processed with realistic grammars, yielding thousands or millions of possible analyses. Methods for disambiguation often involve the use of <a href="/wiki/Corpus_linguistics" title="Corpus linguistics">corpora</a> and <a href="/wiki/Markov_model" title="Markov model">Markov models</a>. Statistical NLP comprises all quantitative approaches to automated <a href="/wiki/Language_processing" title="Language processing">language processing</a>, including probabilistic modeling, <a href="/wiki/Information_theory" title="Information theory">information theory</a>, and <a href="/wiki/Linear_algebra" title="Linear algebra">linear algebra</a>.<sup id="cite_ref-5" class="reference"><a href="#cite_note-5"><span>[</span>6<span>]</span></a></sup> The technology for statistical NLP comes mainly from <a href="/wiki/Machine_learning" title="Machine learning">machine learning</a> and <a href="/wiki/Data_mining" title="Data mining">data mining</a>, both of which are fields of <a href="/wiki/Artificial_intelligence" title="Artificial intelligence">artificial intelligence</a> that involve learning from data.</p>
<h2><span class="mw-headline" id="Evaluation_of_natural_language_processing">Evaluation of natural language processing</span></h2>
<h3><span class="mw-headline" id="Objectives">Objectives</span></h3>
<p>The goal of NLP evaluation is to measure one or more <i>qualities</i> of an algorithm or a system, in order to determine whether (or to what extent) the system answers the goals of its designers, or meets the needs of its users. Research in NLP evaluation has received considerable attention, because the definition of proper evaluation criteria is one way to specify precisely an NLP problem, going thus beyond the vagueness of tasks defined only as <i>language understanding</i> or <i>language generation.</i> A precise set of evaluation criteria, which includes mainly evaluation data and evaluation metrics, enables several teams to compare their solutions to a given NLP problem.</p>
<h3><span class="mw-headline" id="Short_history_of_evaluation_in_NLP">Short history of evaluation in NLP</span></h3>
<p>The first evaluation campaign on written texts seems to be a campaign dedicated to message understanding in 1987 (Pallet 1998). Then, the Parseval/GEIG project compared phrase-structure grammars (Black 1991). A series of campaigns within Tipster project were realized on tasks like summarization, translation and searching (Hirschman 1998). In 1994, in Germany, the Morpholympics compared German taggers. Then, the Senseval and Romanseval campaigns were conducted with the objectives of semantic disambiguation. In 1996, the Sparkle campaign compared syntactic parsers in four different languages (English, French, German and Italian). In France, the Grace project compared a set of 21 taggers for French in 1997 (Adda 1999). In 2004, during the <a href="/wiki/Technolangue/Easy" title="Technolangue/Easy">Technolangue/Easy</a> project, 13 parsers for French were compared. Large-scale evaluation of dependency parsers were performed in the context of the CoNLL shared tasks in 2006 and 2007. In Italy, the EVALITA campaign was conducted in 2007 and 2009 to compare various NLP and speech tools for Italian; the 2011 campaign is in full progress - <a href="http://www.evalita.org" class="external text" rel="nofollow">EVALITA web site</a>. In France, within the ANR-Passage project (end of 2007), 10 parsers for French were compared - <a href="http://atoll.inria.fr/passage/" class="external text" rel="nofollow">passage web site</a>.</p>
<p>Adda G., Mariani J., Paroubek P., Rajman M. 1999 L'action GRACE d'évaluation de l'assignation des parties du discours pour le français. Langues vol-2<br />
Black E., Abney S., Flickinger D., Gdaniec C., Grishman R., Harrison P., Hindle D., Ingria R., Jelinek F., Klavans J., Liberman M., Marcus M., Reukos S., Santoni B., Strzalkowski T. 1991 A procedure for quantitatively comparing the syntactic coverage of English grammars. DARPA Speech and Natural Language Workshop<br />
Hirschman L. 1998 Language understanding evaluation: lessons learned from MUC and ATIS. LREC Granada<br />
Pallet D.S. 1998 The NIST role in automatic speech recognition benchmark tests. LREC Granada</p>
<h3><span class="mw-headline" id="Different_types_of_evaluation">Different types of evaluation</span></h3>
<p>Depending on the evaluation procedures, a number of distinctions are traditionally made in NLP evaluation.</p>
<ul>
<li>Intrinsic vs. extrinsic evaluation</li>
</ul>
<p>Intrinsic evaluation considers an isolated NLP system and characterizes its performance mainly with respect to a <i>gold standard</i> result, pre-defined by the evaluators. Extrinsic evaluation, also called <i>evaluation in use</i> considers the NLP system in a more complex setting, either as an embedded system or serving a precise function for a human user. The extrinsic performance of the system is then characterized in terms of its utility with respect to the overall task of the complex system or the human user. For example, consider a syntactic parser that is based on the output of some new part of speech (POS) tagger. An intrinsic evaluation would run the POS tagger on some labelled data, and compare the system output of the POS tagger to the gold standard (correct) output. An extrinsic evaluation would run the parser with some other POS tagger, and then with the new POS tagger, and compare the parsing accuracy.</p>
<ul>
<li>Black-box vs. glass-box evaluation</li>
</ul>
<p>Black-box evaluation requires one to run an NLP system on a given data set and to measure a number of parameters related to the quality of the process (speed, reliability, resource consumption) and, most importantly, to the quality of the result (e.g. the accuracy of data annotation or the fidelity of a translation). Glass-box evaluation looks at the design of the system, the algorithms that are implemented, the linguistic resources it uses (e.g. vocabulary size), etc. Given the complexity of NLP problems, it is often difficult to predict performance only on the basis of glass-box evaluation, but this type of evaluation is more informative with respect to error analysis or future developments of a system.</p>
<ul>
<li>Automatic vs. manual evaluation</li>
</ul>
<p>In many cases, automatic procedures can be defined to evaluate an NLP system by comparing its output with the gold standard (or desired) one. Although the cost of producing the gold standard can be quite high, automatic evaluation can be repeated as often as needed without much additional costs (on the same input data). However, for many NLP problems, the definition of a gold standard is a complex task, and can prove impossible when inter-annotator agreement is insufficient. Manual evaluation is performed by human judges, which are instructed to estimate the quality of a system, or most often of a sample of its output, based on a number of criteria. Although, thanks to their linguistic competence, human judges can be considered as the reference for a number of language processing tasks, there is also considerable variation across their ratings. This is why automatic evaluation is sometimes referred to as <i>objective</i> evaluation, while the human kind appears to be more <i>subjective.</i></p>
<h3><span class="mw-headline" id="Shared_tasks_.28Campaigns.29">Shared tasks (Campaigns)</span></h3>
<ul>
<li><a href="/wiki/BioCreative" title="BioCreative">BioCreative</a></li>
<li><a href="/wiki/Message_Understanding_Conference" title="Message Understanding Conference">Message Understanding Conference</a></li>
<li><a href="/wiki/Technolangue/Easy" title="Technolangue/Easy">Technolangue/Easy</a></li>
<li><a href="/wiki/Text_Retrieval_Conference" title="Text Retrieval Conference">Text Retrieval Conference</a></li>
<li><a href="/wiki/SemEval" title="SemEval">Evaluation exercises on Semantic Evaluation (SemEval)</a></li>
<li><a href="http://research.ics.tkk.fi/events/morphochallenge2010/" class="external text" rel="nofollow">MorphoChallenge</a> Semi-supervised and Unsupervised Morpheme Analysis</li>
</ul>
<h2><span class="mw-headline" id="Standardization_in_NLP">Standardization in NLP</span></h2>
<p>An ISO sub-committee is working in order to ease interoperability between <a href="/wiki/Lexical_resource" title="Lexical resource">lexical resources</a> and NLP programs. The sub-committee is part of <a href="/wiki/ISO/TC37" title="ISO/TC37" class="mw-redirect">ISO/TC37</a> and is called ISO/TC37/SC4. Some ISO standards are already published but most of them are under construction, mainly on lexicon representation (see <a href="/wiki/Lexical_markup_framework" title="Lexical markup framework" class="mw-redirect">LMF</a>), annotation and data category registry.</p>
<h2><span class="mw-headline" id="Journals">Journals</span></h2>
<ul>
<li><a href="/wiki/Computational_Linguistics_(journal)" title="Computational Linguistics (journal)">Computational Linguistics</a></li>
<li><a href="/wiki/International_Conference_on_Language_Resources_and_Evaluation" title="International Conference on Language Resources and Evaluation">International Conference on Language Resources and Evaluation</a></li>
<li><a href="/wiki/Linguistic_Issues_in_Language_Technology" title="Linguistic Issues in Language Technology">Linguistic Issues in Language Technology</a></li>
</ul>
<h2><span class="mw-headline" id="Organizations_and_conferences">Organizations and conferences</span></h2>
<h3><span class="mw-headline" id="Associations">Associations</span></h3>
<ul>
<li><a href="/wiki/Association_for_Computational_Linguistics" title="Association for Computational Linguistics">Association for Computational Linguistics</a> (ACL)</li>
<li><a href="/w/index.php?title=Association_for_Machine_Translation_in_the_Americas&amp;action=edit&amp;redlink=1" class="new" title="Association for Machine Translation in the Americas (page does not exist)">Association for Machine Translation in the Americas</a> (AMTA)</li>
<li><a href="/wiki/AFNLP" title="AFNLP">AFNLP</a> - Asian Federation of Natural Language Processing Associations</li>
<li><a href="/wiki/Australasian_Language_Technology_Association" title="Australasian Language Technology Association">Australasian Language Technology Association</a> (ALTA)</li>
<li>Spanish Society of Natural Language Processing (<a href="/w/index.php?title=SEPLN&amp;action=edit&amp;redlink=1" class="new" title="SEPLN (page does not exist)">SEPLN</a>)</li>
<li>Mexican Association of Natural Language Processing (<a href="/w/index.php?title=AMPLN&amp;action=edit&amp;redlink=1" class="new" title="AMPLN (page does not exist)">AMPLN</a>)</li>
</ul>
<h3><span class="mw-headline" id="Conferences">Conferences</span></h3>
<p>Major conferences include:</p>
<ul>
<li>Annual Meeting of the Association for Computational Linguistics (aka ACL conference)</li>
<li>International Conference on Computational Linguistics (COLING)</li>
<li><a href="/wiki/International_Conference_on_Language_Resources_and_Evaluation" title="International Conference on Language Resources and Evaluation">International Conference on Language Resources and Evaluation</a> (LREC)</li>
<li>Conference on Intelligent Text Processing and Computational Linguistics (CICLing)</li>
<li>Empirical Methods on Natural Language Processing (EMNLP)</li>
</ul>
<h2><span class="mw-headline" id="Software_tools">Software tools</span></h2>
<div class="rellink relarticle mainarticle">Main article: <a href="/wiki/Natural_language_processing_toolkits" title="Natural language processing toolkits" class="mw-redirect">Natural language processing toolkits</a></div>
<ul>
<li><a href="/wiki/OpenNLP" title="OpenNLP">OpenNLP</a></li>
<li><a href="/wiki/General_Architecture_for_Text_Engineering" title="General Architecture for Text Engineering">General Architecture for Text Engineering</a> (GATE)</li>
<li><a href="/wiki/Unstructured_Information_Management_Architecture" title="Unstructured Information Management Architecture" class="mw-redirect">Unstructured Information Management Architecture</a> (UIMA)</li>
<li><a href="/wiki/Modular_Audio_Recognition_Framework" title="Modular Audio Recognition Framework">Modular Audio Recognition Framework</a></li>
<li><a href="/wiki/MontyLingua" title="MontyLingua">MontyLingua</a></li>
<li><a href="/wiki/Natural_Language_Toolkit" title="Natural Language Toolkit">Natural Language Toolkit</a> (NLTK): a <a href="/wiki/Python_(programming_language)" title="Python (programming language)">Python</a> library suite</li>
</ul>
<h2><span class="mw-headline" id="See_also">See also</span></h2>
<ul>
<li><a href="/wiki/Biomedical_text_mining" title="Biomedical text mining">Biomedical text mining</a></li>
<li><a href="/wiki/Compound_term_processing" title="Compound term processing">Compound term processing</a></li>
<li><a href="/wiki/Computer-assisted_reviewing" title="Computer-assisted reviewing">Computer-assisted reviewing</a></li>
<li><a href="/wiki/Controlled_natural_language" title="Controlled natural language">Controlled natural language</a></li>
<li><a href="/wiki/Foreign_language_reading_aid" title="Foreign language reading aid">Foreign language reading aid</a></li>
<li><a href="/wiki/Foreign_language_writing_aid" title="Foreign language writing aid">Foreign language writing aid</a></li>
<li><a href="/wiki/Language_technology" title="Language technology">Language technology</a></li>
<li><a href="/wiki/Latent_semantic_indexing" title="Latent semantic indexing">Latent semantic indexing</a></li>
<li><a href="/wiki/Natural_language_programming" title="Natural language programming">Natural language programming</a></li>
<li><a href="/wiki/Transderivational_search" title="Transderivational search">Transderivational search</a></li>
<li><a href="/wiki/Reification_(linguistics)" title="Reification (linguistics)">Reification (linguistics)</a></li>
<li><a href="/wiki/Spoken_dialogue_system" title="Spoken dialogue system" class="mw-redirect">Spoken dialogue system</a></li>
<li><a href="/wiki/Watson_(artificial_intelligence_software)" title="Watson (artificial intelligence software)" class="mw-redirect">Watson (artificial intelligence software)</a></li>
</ul>
<h2><span class="mw-headline" id="References">References</span></h2>
<div class="reflist" style="list-style-type: decimal;">
<ol class="references">
<li id="cite_note-0"><b><a href="#cite_ref-0">^</a></b> Charniak, Eugene: <i>Introduction to artificial intelligence</i>, page 2. Addison-Wesley, 1984.</li>
<li id="cite_note-Kongthon-1"><b><a href="#cite_ref-Kongthon_1-0">^</a></b> <a href="http://portal.acm.org/citation.cfm?id=1643823.1643908" class="external text" rel="nofollow">Implementing an online help desk system based on conversational agent</a> Authors: Alisa Kongthon, Chatchawal Sangkeettrakarn, Sarawoot Kongyoung and Choochart Haruechaiyasak. Published by ACM 2009 Article, Bibliometrics Data Bibliometrics. Published in: Proceeding, MEDES '09 Proceedings of the International Conference on Management of Emergent Digital EcoSystems, ACM New York, NY, USA. <a href="/wiki/Special:BookSources/9781605588292" class="internal mw-magiclink-isbn">ISBN 978-1-60558-829-2</a>, doi:10.1145/1643823.1643908</li>
<li id="cite_note-2"><b><a href="#cite_ref-2">^</a></b> Hutchins, J. (2005)</li>
<li id="cite_note-3"><b><a href="#cite_ref-3">^</a></b> Chomskyan linguistics encourages the investigation of "<a href="/wiki/Corner_case" title="Corner case">corner cases</a>" that stress the limits of its theoretical models (comparable to <a href="/wiki/Pathological_(mathematics)" title="Pathological (mathematics)">pathological</a> phenomena in mathematics), typically created using <a href="/wiki/Thought_experiment" title="Thought experiment">thought experiments</a>, rather than the systematic investigation of typical phenomena that occur in real-world data, as is the case in <a href="/wiki/Corpus_linguistics" title="Corpus linguistics">corpus linguistics</a>. The creation and use of such <a href="/wiki/Text_corpus" title="Text corpus">corpora</a> of real-world data is a fundamental part of machine-learning algorithms for NLP. In addition, theoretical underpinnings of Chomskyan linguistics such as the so-called "<a href="/wiki/Poverty_of_the_stimulus" title="Poverty of the stimulus">poverty of the stimulus</a>" argument entail that general learning algorithms, as are typically used in machine learning, cannot be successful in language processing. As a result, the Chomskyan paradigm discouraged the application of such models to language processing.</li>
<li id="cite_note-4"><b><a href="#cite_ref-4">^</a></b> Yucong Duan, Christophe Cruz (2011), <i><a href="http://www.ijimt.org/abstract/100-E00187.htm" class="external text" rel="nofollow">Formalizing Semantic of Natural Language through Conceptualization from Existence</a></i>. International Journal of Innovation, Management and Technology(2011) 2 (1), pp. 37-42.</li>
<li id="cite_note-5"><b><a href="#cite_ref-5">^</a></b> Christopher D. Manning, Hinrich Schütze: <i>Foundations of Statistical Natural Language Processing</i>, MIT Press (1999), <a href="/wiki/Special:BookSources/9780262133609" class="internal mw-magiclink-isbn">ISBN 978-0-262-13360-9</a>, p. xxxi</li>
</ol>
</div>
<h3><span class="mw-headline" id="Related_academic_articles">Related academic articles</span></h3>
<ul>
<li>Bates, M. (1995). Models of natural language understanding. Proceedings of the National Academy of Sciences of the United States of America, Vol. 92, No. 22 (Oct. 24, 1995), pp.&#160;9977–9982.</li>
</ul>
<h2><span class="mw-headline" id="External_links">External links</span></h2>
<h3><span class="mw-headline" id="Resources">Resources</span></h3>
<ul>
<li><a href="http://aclweb.org/anthology-new/" class="external text" rel="nofollow">Computational Linguistics Publications</a></li>
<li><a href="http://aclweb.org/aclwiki" class="external text" rel="nofollow">Computational Linguistics Resources</a></li>
<li><a href="http://www.cs.technion.ac.il/~gabr/resources/resources.html" class="external text" rel="nofollow">Resources for Text, Speech and Language Processing</a></li>
<li><a href="http://www.CICLing.org/" class="external text" rel="nofollow">CICLing annual conferences on Intelligent Text Processing and Computational Linguistics</a></li>
<li><i><a href="http://www.gelbukh.com/clbook/" class="external text" rel="nofollow">Computation Linguistics: Models, Resources, Applications</a></i> (online text)</li>
<li><a href="https://kitwiki.csc.fi/twiki/bin/view/FiLT/FiLTWikiEn" class="external text" rel="nofollow">Language Technology Documentation Centre in Finland (FiLT)</a></li>
<li><a href="http://specgram.com/CLIII.4/08.phlogiston.cartoon.zhe.html" class="external text" rel="nofollow">Some simple examples of NLP-hard utterances.</a></li>
<li><a href="http://www.nltk.org/getting-started" class="external text" rel="nofollow">Natural Language Toolkit</a></li>
<li><a href="http://code.google.com/p/graph-expression/wiki/Examples" class="external text" rel="nofollow">GExp: Rule based information extraction toolkit</a></li>
<li><a href="/wiki/OpenNLP" title="OpenNLP">OpenNLP</a></li>
</ul>
<h3><span class="mw-headline" id="Organizations">Organizations</span></h3>
<ul>
<li><a href="http://www.aclweb.org" class="external text" rel="nofollow">The Association for Computational Linguistics</a></li>
<li><a href="http://nlg.isi.edu/" class="external text" rel="nofollow">The Natural Language Group at the USC Information Sciences Institute</a></li>
<li><a href="http://clsp.jhu.edu" class="external text" rel="nofollow">The Center for Language and Speech Processing at The Johns Hopkins University</a></li>
<li><a href="http://nlp.stanford.edu/" class="external text" rel="nofollow">The Stanford Natural Language Processing Group</a></li>
<li><a href="http://l2r.cs.uiuc.edu/~cogcomp/" class="external text" rel="nofollow">The Cognitive Computation Group</a></li>
<li><a href="http://nlp.cic.ipn.mx" class="external text" rel="nofollow">NLP group of CIC-IPN; many publications available online</a></li>
<li><a href="http://clac.cs.concordia.ca" class="external text" rel="nofollow">CLaC -- Computation Linguistics at Concordia</a>, <a href="/wiki/Concordia_University_(Montreal)" title="Concordia University (Montreal)" class="mw-redirect">Concordia University</a></li>
<li><a href="http://nlpapplications.com/" class="external text" rel="nofollow">NLP International</a></li>
<li><a href="http://lucid.cpmc.columbia.edu/medlee/" class="external text" rel="nofollow">MedLEE</a> Natural Language Processing for medical narratives developed by <a href="/w/index.php?title=Carol_Friedman&amp;action=edit&amp;redlink=1" class="new" title="Carol Friedman (page does not exist)">Carol Friedman</a> at <a href="/wiki/Columbia_University" title="Columbia University">Columbia University</a> and available commercially through</li>
<li><a href="http://www.meshlabsinc.com" class="external text" rel="nofollow">MeshLabs</a> - MeshLabs develops text analytics solutions that discover information from unstructured data and deliver highly relevant personalized knowledge and actionable insights from any given content source, channel, and type.</li>
</ul>


<!-- 
NewPP limit report
Preprocessor node count: 1821/1000000
Post-expand include size: 8971/2048000 bytes
Template argument size: 3355/2048000 bytes
Expensive parser function count: 0/500
-->

<!-- Saved in parser cache with key enwiki:pcache:idhash:21652-0!*!0!!en!4!edit=0 and timestamp 20110912140032 -->
<div class="printfooter">
Retrieved from "<a href="http://en.wikipedia.org/wiki/Natural_language_processing">http://en.wikipedia.org/wiki/Natural_language_processing</a>"</div>
				<!-- /bodytext -->
								<!-- catlinks -->
				<div id='catlinks' class='catlinks'><div id="mw-normal-catlinks"><a href="/wiki/Special:Categories" title="Special:Categories">Categories</a>: <span dir='ltr'><a href="/wiki/Category:Computational_linguistics" title="Category:Computational linguistics">Computational linguistics</a></span> | <span dir='ltr'><a href="/wiki/Category:Speech_recognition" title="Category:Speech recognition">Speech recognition</a></span> | <span dir='ltr'><a href="/wiki/Category:Natural_language_processing" title="Category:Natural language processing">Natural language processing</a></span></div><div id="mw-hidden-catlinks" class="mw-hidden-cats-hidden">Hidden categories: <span dir='ltr'><a href="/wiki/Category:Articles_needing_additional_references_from_July_2008" title="Category:Articles needing additional references from July 2008">Articles needing additional references from July 2008</a></span> | <span dir='ltr'><a href="/wiki/Category:All_articles_needing_additional_references" title="Category:All articles needing additional references">All articles needing additional references</a></span> | <span dir='ltr'><a href="/wiki/Category:Wikipedia_articles_needing_reorganization_from_July_2008" title="Category:Wikipedia articles needing reorganization from July 2008">Wikipedia articles needing reorganization from July 2008</a></span></div></div>				<!-- /catlinks -->
												<div class="visualClear"></div>
			</div>
			<!-- /bodyContent -->
		</div>
		<!-- /content -->
		<!-- header -->
		<div id="mw-head" class="noprint">
			
<!-- 0 -->
<div id="p-personal" class="">
	<h5>Personal tools</h5>
	<ul>
					<li  id="pt-login"><a href="/w/index.php?title=Special:UserLogin&amp;returnto=Natural_language_processing&amp;returntoquery=printable%3Dyes" title="You are encouraged to log in; however, it is not mandatory. [o]" accesskey="o">Log in / create account</a></li>
			</ul>
</div>

<!-- /0 -->
			<div id="left-navigation">
				
<!-- 0 -->
<div id="p-namespaces" class="vectorTabs">
	<h5>Namespaces</h5>
	<ul>
					<li  id="ca-nstab-main" class="selected"><span><a href="/wiki/Natural_language_processing"  title="View the content page [c]" accesskey="c">Article</a></span></li>
					<li  id="ca-talk"><span><a href="/wiki/Talk:Natural_language_processing"  title="Discussion about the content page [t]" accesskey="t">Discussion</a></span></li>
			</ul>
</div>

<!-- /0 -->

<!-- 1 -->
<div id="p-variants" class="vectorMenu emptyPortlet">
		<h5><span>Variants</span><a href="#"></a></h5>
	<div class="menu">
		<ul>
					</ul>
	</div>
</div>

<!-- /1 -->
			</div>
			<div id="right-navigation">
				
<!-- 0 -->
<div id="p-views" class="vectorTabs">
	<h5>Views</h5>
	<ul>
					<li id="ca-view" class="selected"><span><a href="/wiki/Natural_language_processing" >Read</a></span></li>
					<li id="ca-edit"><span><a href="/w/index.php?title=Natural_language_processing&amp;action=edit"  title="You can edit this page. &#10;Please use the preview button before saving. [e]" accesskey="e">Edit</a></span></li>
					<li id="ca-history" class="collapsible "><span><a href="/w/index.php?title=Natural_language_processing&amp;action=history"  title="Past versions of this page [h]" accesskey="h">View history</a></span></li>
			</ul>
</div>

<!-- /0 -->

<!-- 1 -->
<div id="p-cactions" class="vectorMenu emptyPortlet">
	<h5><span>Actions</span><a href="#"></a></h5>
	<div class="menu">
		<ul>
					</ul>
	</div>
</div>

<!-- /1 -->

<!-- 2 -->
<div id="p-search">
	<h5><label for="searchInput">Search</label></h5>
	<form action="/w/index.php" id="searchform">
		<input type='hidden' name="title" value="Special:Search"/>
				<div id="simpleSearch">
						<input id="searchInput" name="search" type="text"  title="Search Wikipedia [f]" accesskey="f"  value="" />
						<button id="searchButton" type='submit' name='button'  title="Search Wikipedia for this text"><img src="http://bits.wikimedia.org/skins-1.17/vector/images/search-ltr.png?301-3" alt="Search" /></button>
					</div>
			</form>
</div>

<!-- /2 -->
			</div>
		</div>
		<!-- /header -->
		<!-- panel -->
			<div id="mw-panel" class="noprint">
				<!-- logo -->
					<div id="p-logo"><a style="background-image: url(http://upload.wikimedia.org/wikipedia/en/b/bc/Wiki.png);" href="/wiki/Main_Page"  title="Visit the main page"></a></div>
				<!-- /logo -->
				
<!-- navigation -->
<div class="portal" id='p-navigation'>
	<h5>Navigation</h5>
	<div class="body">
				<ul>
					<li id="n-mainpage-description"><a href="/wiki/Main_Page" title="Visit the main page [z]" accesskey="z">Main page</a></li>
					<li id="n-contents"><a href="/wiki/Portal:Contents" title="Guides to browsing Wikipedia">Contents</a></li>
					<li id="n-featuredcontent"><a href="/wiki/Portal:Featured_content" title="Featured content – the best of Wikipedia">Featured content</a></li>
					<li id="n-currentevents"><a href="/wiki/Portal:Current_events" title="Find background information on current events">Current events</a></li>
					<li id="n-randompage"><a href="/wiki/Special:Random" title="Load a random article [x]" accesskey="x">Random article</a></li>
					<li id="n-sitesupport"><a href="http://wikimediafoundation.org/wiki/Special:Landingcheck?landing_page=WMFJA085&amp;language=en&amp;utm_source=donate&amp;utm_medium=sidebar&amp;utm_campaign=20101204SB002" title="Support us">Donate to Wikipedia</a></li>
				</ul>
			</div>
</div>

<!-- /navigation -->

<!-- SEARCH -->

<!-- /SEARCH -->

<!-- interaction -->
<div class="portal" id='p-interaction'>
	<h5>Interaction</h5>
	<div class="body">
				<ul>
					<li id="n-help"><a href="/wiki/Help:Contents" title="Guidance on how to use and edit Wikipedia">Help</a></li>
					<li id="n-aboutsite"><a href="/wiki/Wikipedia:About" title="Find out about Wikipedia">About Wikipedia</a></li>
					<li id="n-portal"><a href="/wiki/Wikipedia:Community_portal" title="About the project, what you can do, where to find things">Community portal</a></li>
					<li id="n-recentchanges"><a href="/wiki/Special:RecentChanges" title="The list of recent changes in the wiki [r]" accesskey="r">Recent changes</a></li>
					<li id="n-contact"><a href="/wiki/Wikipedia:Contact_us" title="How to contact Wikipedia">Contact Wikipedia</a></li>
				</ul>
			</div>
</div>

<!-- /interaction -->

<!-- TOOLBOX -->
<div class="portal" id="p-tb">
	<h5>Toolbox</h5>
	<div class="body">
		<ul>
					<li id="t-whatlinkshere"><a href="/wiki/Special:WhatLinksHere/Natural_language_processing" title="List of all English Wikipedia pages containing links to this page [j]" accesskey="j">What links here</a></li>
						<li id="t-recentchangeslinked"><a href="/wiki/Special:RecentChangesLinked/Natural_language_processing" title="Recent changes in pages linked from this page [k]" accesskey="k">Related changes</a></li>
																																					<li id="t-upload"><a href="/wiki/Wikipedia:Upload" title="Upload files [u]" accesskey="u">Upload file</a></li>
											<li id="t-specialpages"><a href="/wiki/Special:SpecialPages" title="List of all special pages [q]" accesskey="q">Special pages</a></li>
											<li id="t-permalink"><a href="/w/index.php?title=Natural_language_processing&amp;oldid=450080870" title="Permanent link to this revision of the page">Permanent link</a></li>
				<li id="t-cite"><a href="/w/index.php?title=Special:Cite&amp;page=Natural_language_processing&amp;id=450080870" title="Information on how to cite this page">Cite this page</a></li>		</ul>
	</div>
</div>

<!-- /TOOLBOX -->

<!-- coll-print_export -->
<div class="portal" id='p-coll-print_export'>
	<h5>Print/export</h5>
	<div class="body">
				<ul id="collectionPortletList"><li id="coll-create_a_book"><a href="/w/index.php?title=Special:Book&amp;bookcmd=book_creator&amp;referer=Natural+language+processing" title="Create a book or page collection" rel="nofollow">Create a book</a></li><li id="coll-download-as-rl"><a href="/w/index.php?title=Special:Book&amp;bookcmd=render_article&amp;arttitle=Natural+language+processing&amp;oldid=450080870&amp;writer=rl" title="Download a PDF version of this wiki page" rel="nofollow">Download as PDF</a></li></ul>			</div>
</div>

<!-- /coll-print_export -->

<!-- LANGUAGES -->
<div class="portal" id="p-lang">
	<h5>Languages</h5>
	<div class="body">
		<ul>
					<li class="interwiki-ar"><a href="http://ar.wikipedia.org/wiki/%D9%85%D8%B9%D8%A7%D9%84%D8%AC%D8%A9_%D8%A7%D9%84%D9%84%D8%BA%D8%A7%D8%AA_%D8%A7%D9%84%D8%B7%D8%A8%D9%8A%D8%B9%D9%8A%D8%A9" title="معالجة اللغات الطبيعية">العربية</a></li>
					<li class="interwiki-zh-min-nan"><a href="http://zh-min-nan.wikipedia.org/wiki/Ch%C5%AB-ji%C3%A2n_g%C3%BA-gi%C3%A2n_chh%C3%BA-l%C3%AD" title="Chū-jiân gú-giân chhú-lí">Bân-lâm-gú</a></li>
					<li class="interwiki-be"><a href="http://be.wikipedia.org/wiki/%D0%90%D0%BF%D1%80%D0%B0%D1%86%D0%BE%D1%9E%D0%BA%D0%B0_%D0%BD%D0%B0%D1%82%D1%83%D1%80%D0%B0%D0%BB%D1%8C%D0%BD%D0%B0%D0%B9_%D0%BC%D0%BE%D0%B2%D1%8B" title="Апрацоўка натуральнай мовы">Беларуская</a></li>
					<li class="interwiki-be-x-old"><a href="http://be-x-old.wikipedia.org/wiki/%D0%90%D0%BF%D1%80%D0%B0%D1%86%D0%BE%D1%9E%D0%BA%D0%B0_%D0%BD%D0%B0%D1%82%D1%83%D1%80%D0%B0%D0%BB%D1%8C%D0%BD%D0%B0%D0%B9_%D0%BC%D0%BE%D0%B2%D1%8B" title="Апрацоўка натуральнай мовы">‪Беларуская (тарашкевіца)‬</a></li>
					<li class="interwiki-bg"><a href="http://bg.wikipedia.org/wiki/%D0%9E%D0%B1%D1%80%D0%B0%D0%B1%D0%BE%D1%82%D0%BA%D0%B0_%D0%BD%D0%B0_%D0%B5%D1%81%D1%82%D0%B5%D1%81%D1%82%D0%B2%D0%B5%D0%BD_%D0%B5%D0%B7%D0%B8%D0%BA" title="Обработка на естествен език">Български</a></li>
					<li class="interwiki-ca"><a href="http://ca.wikipedia.org/wiki/Processament_de_llenguatge_natural" title="Processament de llenguatge natural">Català</a></li>
					<li class="interwiki-cs"><a href="http://cs.wikipedia.org/wiki/Zpracov%C3%A1n%C3%AD_p%C5%99irozen%C3%A9ho_jazyka" title="Zpracování přirozeného jazyka">Česky</a></li>
					<li class="interwiki-da"><a href="http://da.wikipedia.org/wiki/Sprogteknologi" title="Sprogteknologi">Dansk</a></li>
					<li class="interwiki-de"><a href="http://de.wikipedia.org/wiki/Computerlinguistik" title="Computerlinguistik">Deutsch</a></li>
					<li class="interwiki-es"><a href="http://es.wikipedia.org/wiki/Procesamiento_de_lenguajes_naturales" title="Procesamiento de lenguajes naturales">Español</a></li>
					<li class="interwiki-eu"><a href="http://eu.wikipedia.org/wiki/Lengoaia_naturalen_prozesamendu" title="Lengoaia naturalen prozesamendu">Euskara</a></li>
					<li class="interwiki-fa"><a href="http://fa.wikipedia.org/wiki/%D9%BE%D8%B1%D8%AF%D8%A7%D8%B2%D8%B4_%D8%B2%D8%A8%D8%A7%D9%86%E2%80%8C%D9%87%D8%A7%DB%8C_%D8%B7%D8%A8%DB%8C%D8%B9%DB%8C" title="پردازش زبان‌های طبیعی">فارسی</a></li>
					<li class="interwiki-fr"><a href="http://fr.wikipedia.org/wiki/Traitement_automatique_du_langage_naturel" title="Traitement automatique du langage naturel">Français</a></li>
					<li class="interwiki-gl"><a href="http://gl.wikipedia.org/wiki/Procesamento_da_linguaxe_natural" title="Procesamento da linguaxe natural">Galego</a></li>
					<li class="interwiki-ko"><a href="http://ko.wikipedia.org/wiki/%EC%9E%90%EC%97%B0_%EC%96%B8%EC%96%B4_%EC%B2%98%EB%A6%AC" title="자연 언어 처리">한국어</a></li>
					<li class="interwiki-hi"><a href="http://hi.wikipedia.org/wiki/%E0%A4%AA%E0%A5%8D%E0%A4%B0%E0%A4%BE%E0%A4%95%E0%A5%83%E0%A4%A4%E0%A4%BF%E0%A4%95_%E0%A4%AD%E0%A4%BE%E0%A4%B7%E0%A4%BE_%E0%A4%B8%E0%A4%82%E0%A4%B8%E0%A4%BE%E0%A4%A7%E0%A4%A8" title="प्राकृतिक भाषा संसाधन">हिन्दी</a></li>
					<li class="interwiki-id"><a href="http://id.wikipedia.org/wiki/Pemrosesan_bahasa_alami" title="Pemrosesan bahasa alami">Bahasa Indonesia</a></li>
					<li class="interwiki-it"><a href="http://it.wikipedia.org/wiki/Elaborazione_del_linguaggio_naturale" title="Elaborazione del linguaggio naturale">Italiano</a></li>
					<li class="interwiki-he"><a href="http://he.wikipedia.org/wiki/%D7%A2%D7%99%D7%91%D7%95%D7%93_%D7%A9%D7%A4%D7%94_%D7%98%D7%91%D7%A2%D7%99%D7%AA" title="עיבוד שפה טבעית">עברית</a></li>
					<li class="interwiki-lt"><a href="http://lt.wikipedia.org/wiki/Nat%C5%ABralios_kalbos_apdorojimas" title="Natūralios kalbos apdorojimas">Lietuvių</a></li>
					<li class="interwiki-ja"><a href="http://ja.wikipedia.org/wiki/%E8%87%AA%E7%84%B6%E8%A8%80%E8%AA%9E%E5%87%A6%E7%90%86" title="自然言語処理">日本語</a></li>
					<li class="interwiki-pl"><a href="http://pl.wikipedia.org/wiki/Przetwarzanie_j%C4%99zyka_naturalnego" title="Przetwarzanie języka naturalnego">Polski</a></li>
					<li class="interwiki-pt"><a href="http://pt.wikipedia.org/wiki/Processamento_de_linguagem_natural" title="Processamento de linguagem natural">Português</a></li>
					<li class="interwiki-ro"><a href="http://ro.wikipedia.org/wiki/Prelucrarea_limbajului_natural" title="Prelucrarea limbajului natural">Română</a></li>
					<li class="interwiki-ru"><a href="http://ru.wikipedia.org/wiki/%D0%9E%D0%B1%D1%80%D0%B0%D0%B1%D0%BE%D1%82%D0%BA%D0%B0_%D0%B5%D1%81%D1%82%D0%B5%D1%81%D1%82%D0%B2%D0%B5%D0%BD%D0%BD%D0%BE%D0%B3%D0%BE_%D1%8F%D0%B7%D1%8B%D0%BA%D0%B0" title="Обработка естественного языка">Русский</a></li>
					<li class="interwiki-simple"><a href="http://simple.wikipedia.org/wiki/Natural_language_processing" title="Natural language processing">Simple English</a></li>
					<li class="interwiki-sr"><a href="http://sr.wikipedia.org/wiki/Obrada_prirodnih_jezika" title="Obrada prirodnih jezika">Српски / Srpski</a></li>
					<li class="interwiki-ta"><a href="http://ta.wikipedia.org/wiki/%E0%AE%87%E0%AE%AF%E0%AE%B1%E0%AF%8D%E0%AE%95%E0%AF%88_%E0%AE%AE%E0%AF%8A%E0%AE%B4%E0%AE%BF_%E0%AE%AE%E0%AF%81%E0%AE%B1%E0%AF%88%E0%AE%AF%E0%AE%BE%E0%AE%95%E0%AF%8D%E0%AE%95%E0%AE%AE%E0%AF%8D" title="இயற்கை மொழி முறையாக்கம்">தமிழ்</a></li>
					<li class="interwiki-th"><a href="http://th.wikipedia.org/wiki/%E0%B8%81%E0%B8%B2%E0%B8%A3%E0%B8%9B%E0%B8%A3%E0%B8%B0%E0%B8%A1%E0%B8%A7%E0%B8%A5%E0%B8%9C%E0%B8%A5%E0%B8%A0%E0%B8%B2%E0%B8%A9%E0%B8%B2%E0%B8%98%E0%B8%A3%E0%B8%A3%E0%B8%A1%E0%B8%8A%E0%B8%B2%E0%B8%95%E0%B8%B4" title="การประมวลผลภาษาธรรมชาติ">ไทย</a></li>
					<li class="interwiki-tr"><a href="http://tr.wikipedia.org/wiki/Do%C4%9Fal_dil_i%C5%9Fleme" title="Doğal dil işleme">Türkçe</a></li>
					<li class="interwiki-uk"><a href="http://uk.wikipedia.org/wiki/%D0%9E%D0%B1%D1%80%D0%BE%D0%B1%D0%BA%D0%B0_%D0%BF%D1%80%D0%B8%D1%80%D0%BE%D0%B4%D0%BD%D0%BE%D1%97_%D0%BC%D0%BE%D0%B2%D0%B8" title="Обробка природної мови">Українська</a></li>
					<li class="interwiki-vi"><a href="http://vi.wikipedia.org/wiki/X%E1%BB%AD_l%C3%BD_ng%C3%B4n_ng%E1%BB%AF_t%E1%BB%B1_nhi%C3%AAn" title="Xử lý ngôn ngữ tự nhiên">Tiếng Việt</a></li>
					<li class="interwiki-zh"><a href="http://zh.wikipedia.org/wiki/%E8%87%AA%E7%84%B6%E8%AF%AD%E8%A8%80%E5%A4%84%E7%90%86" title="自然语言处理">中文</a></li>
				</ul>
	</div>
</div>

<!-- /LANGUAGES -->
			</div>
		<!-- /panel -->
		<!-- footer -->
		<div id="footer">
											<ul id="footer-info">
																	<li id="footer-info-lastmod"> This page was last modified on 12 September 2011 at 14:00.<br /></li>
																							<li id="footer-info-copyright">Text is available under the <a rel="license" href="http://en.wikipedia.org/wiki/Wikipedia:Text_of_Creative_Commons_Attribution-ShareAlike_3.0_Unported_License">Creative Commons Attribution-ShareAlike License</a><a rel="license" href="http://creativecommons.org/licenses/by-sa/3.0/" style="display:none;"></a>;
additional terms may apply.
See <a href="http://wikimediafoundation.org/wiki/Terms_of_use">Terms of use</a> for details.<br/>
Wikipedia&reg; is a registered trademark of the <a href="http://www.wikimediafoundation.org/">Wikimedia Foundation, Inc.</a>, a non-profit organization.<br /></li><li class="noprint"><a class='internal' href="http://en.wikipedia.org/wiki/Wikipedia:Contact_us">Contact us</a></li>
															</ul>
															<ul id="footer-places">
																	<li id="footer-places-privacy"><a href="http://wikimediafoundation.org/wiki/Privacy_policy" title="wikimedia:Privacy policy">Privacy policy</a></li>
																							<li id="footer-places-about"><a href="/wiki/Wikipedia:About" title="Wikipedia:About">About Wikipedia</a></li>
																							<li id="footer-places-disclaimer"><a href="/wiki/Wikipedia:General_disclaimer" title="Wikipedia:General disclaimer">Disclaimers</a></li>
																							<li id="footer-places-mobileview"><a href='/w/index.php?title=Natural_language_processing&amp;printable=yes&amp;useformat=mobile'>Mobile view</a></li>
															</ul>
											<ul id="footer-icons" class="noprint">
					<li id="footer-copyrightico">
						<a href="http://wikimediafoundation.org/"><img src="http://bits.wikimedia.org/images/wikimedia-button.png" width="88" height="31" alt="Wikimedia Foundation"/></a>
					</li>
					<li id="footer-poweredbyico">
						<a href="http://www.mediawiki.org/"><img src="http://bits.wikimedia.org/skins-1.17/common/images/poweredby_mediawiki_88x31.png" alt="Powered by MediaWiki" width="88" height="31" /></a>
					</li>
				</ul>
						<div style="clear:both"></div>
		</div>
		<!-- /footer -->
		<script type="text/javascript">if ( window.mediaWiki ) {
	mediaWiki.loader.load(["mediawiki.legacy.wikibits", "mediawiki.util", "mediawiki.legacy.ajax", "mediawiki.legacy.mwsuggest", "ext.vector.collapsibleNav", "ext.vector.collapsibleTabs", "ext.vector.editWarning", "ext.vector.simpleSearch", "ext.UserBuckets", "ext.articleFeedback.startup"]);
	mediaWiki.loader.go();
}
</script>

<script src="/w/index.php?title=Special:BannerController&amp;cache=/cn.js&amp;301-3" type="text/javascript"></script>
<script src="http://bits.wikimedia.org/en.wikipedia.org/load.php?debug=false&amp;lang=en&amp;modules=site&amp;only=scripts&amp;printable=1&amp;skin=vector" type="text/javascript"></script>
<script type="text/javascript">if ( window.mediaWiki ) {
	mediaWiki.user.options.set({"ccmeonemails":0,"cols":80,"contextchars":50,"contextlines":5,"date":"default","diffonly":0,"disablemail":0,"disablesuggest":0,"editfont":"default","editondblclick":0,"editsection":1,"editsectiononrightclick":0,"enotifminoredits":0,"enotifrevealaddr":0,"enotifusertalkpages":1,"enotifwatchlistpages":0,"extendwatchlist":0,"externaldiff":0,"externaleditor":0,"fancysig":0,"forceeditsummary":0,"gender":"unknown","hideminor":0,"hidepatrolled":0,"highlightbroken":1,"imagesize":2,"justify":0,"math":1,"minordefault":0,"newpageshidepatrolled":0,"nocache":0,"noconvertlink":0,"norollbackdiff":0,"numberheadings":0,"previewonfirst":0,"previewontop":1,"quickbar":1,"rcdays":7,"rclimit":50,"rememberpassword":0,"rows":25,"searchlimit":20,"showhiddencats":false,"showjumplinks":1,"shownumberswatching":1,"showtoc":1,"showtoolbar":1,"skin":"vector","stubthreshold":0,"thumbsize":4,"underline":2,"uselivepreview":0,"usenewrc":0,"watchcreations":1,"watchdefault":0,"watchdeletion":0,
	"watchlistdays":"3","watchlisthideanons":0,"watchlisthidebots":0,"watchlisthideliu":0,"watchlisthideminor":0,"watchlisthideown":0,"watchlisthidepatrolled":0,"watchmoves":0,"wllimit":250,"flaggedrevssimpleui":1,"flaggedrevsstable":false,"flaggedrevseditdiffs":true,"flaggedrevsviewdiffs":false,"vector-simplesearch":1,"useeditwarning":1,"vector-collapsiblenav":1,"usebetatoolbar":1,"usebetatoolbar-cgd":1,"wikilove-enabled":1,"variant":"en","language":"en","searchNs0":true,"searchNs1":false,"searchNs2":false,"searchNs3":false,"searchNs4":false,"searchNs5":false,"searchNs6":false,"searchNs7":false,"searchNs8":false,"searchNs9":false,"searchNs10":false,"searchNs11":false,"searchNs12":false,"searchNs13":false,"searchNs14":false,"searchNs15":false,"searchNs100":false,"searchNs101":false,"searchNs108":false,"searchNs109":false});;mediaWiki.loader.state({"user.options":"ready"});
	
	/* cache key: enwiki:resourceloader:filter:minify-js:5:c183491fdc987ec95b8873a74ef2bb96 */
}
</script><script type="text/javascript" src="//geoiplookup.wikimedia.org/"></script>		<!-- fixalpha -->
		<script type="text/javascript"> if ( window.isMSIE55 ) fixalpha(); </script>
		<!-- /fixalpha -->
		<!-- Served by srv177 in 0.096 secs. -->			</body>
</html>
